{
    "cells": [
        {
            "cell_type": "code",
            "execution_count": 62,
            "id": "4921c412",
            "metadata": {},
            "outputs": [],
            "source": [
                "from gpt_index import GPTListIndex, SimpleDirectoryReader, LLMPredictor, PromptHelper, ServiceContext\n",
                "from gpt_index.response.notebook_utils import display_response\n",
                "from langchain import OpenAI\n",
                "from langchain.chat_models import ChatOpenAI\n",
                "from IPython.display import Markdown, display"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 3,
            "id": "261d923e",
            "metadata": {},
            "outputs": [],
            "source": [
                "documents = SimpleDirectoryReader('data').load_data()"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "f23b5169",
            "metadata": {},
            "source": [
                "# davinci-003"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 34,
            "id": "0c635cdb",
            "metadata": {},
            "outputs": [],
            "source": [
                "llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=\"text-davinci-003\"))\n",
                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 35,
            "id": "b8ad1a2a",
            "metadata": {},
            "outputs": [
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
                        "INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 0 tokens\n"
                    ]
                }
            ],
            "source": [
                "davinci_index = GPTListIndex.from_documents(documents, service_context=service_context)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 45,
            "id": "c9925597",
            "metadata": {},
            "outputs": [
                {
                    "data": {
                        "text/plain": [
                            "'Document is split into 6 nodes.'"
                        ]
                    },
                    "execution_count": 45,
                    "metadata": {},
                    "output_type": "execute_result"
                }
            ],
            "source": [
                "f'Document is split into {len(davinci_index._index_struct.nodes)} nodes.'"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 68,
            "id": "fa1d7242",
            "metadata": {},
            "outputs": [
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "INFO:gpt_index.indices.common.tree.base:> Building index from nodes: 5 chunks\n",
                        "INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 19882 tokens\n",
                        "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens\n"
                    ]
                }
            ],
            "source": [
                "response = davinci_index.query(\n",
                "    \"What happened on one night in October 2003?\", \n",
                "    response_mode=\"tree_summarize\"\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 69,
            "id": "d758bdb7",
            "metadata": {
                "scrolled": true
            },
            "outputs": [
                {
                    "data": {
                        "text/markdown": [
                            "**`Final Response:`** It is not possible to answer this question with the given context information."
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 1/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** What I Worked On\n",
                            "\n",
                            "February 2021\n",
                            "\n",
                            "Before college the two main things I worked on, outside of schoo...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 2/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** whereby the students wouldn't require the faculty to teach anything, and in return the faculty wo...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 3/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** fact that our software worked via the web, and we got $10,000 in seed funding from Idelle's husba...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 4/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** project was the new Lisp, whose parentheses I now wouldn't even have to hide. A lot of Lisp hacke...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 5/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** chance it had to do with HN, and a 40% chance it had do with everything else combined. [17]\n",
                            "\n",
                            "As w...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 6/6`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** and some people dislike being told such things.\n",
                            "\n",
                            "[11] People put plenty of stuff on the internet ...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                }
            ],
            "source": [
                "display_response(response)"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "3f843a73",
            "metadata": {},
            "source": [
                "# gpt-4"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 63,
            "id": "0849d860",
            "metadata": {},
            "outputs": [],
            "source": [
                "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4\"))\n",
                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 64,
            "id": "bb9eff4a",
            "metadata": {},
            "outputs": [
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total LLM token usage: 0 tokens\n",
                        "INFO:gpt_index.token_counter.token_counter:> [build_index_from_documents] Total embedding token usage: 0 tokens\n"
                    ]
                }
            ],
            "source": [
                "gpt4_index = GPTListIndex.from_documents(documents, service_context=service_context)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 65,
            "id": "cb56a205",
            "metadata": {},
            "outputs": [
                {
                    "data": {
                        "text/plain": [
                            "'Document is split into 3 nodes.'"
                        ]
                    },
                    "execution_count": 65,
                    "metadata": {},
                    "output_type": "execute_result"
                }
            ],
            "source": [
                "f'Document is split into {len(gpt4_index._index_struct.nodes)} nodes.'"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 70,
            "id": "44dda700",
            "metadata": {},
            "outputs": [
                {
                    "name": "stderr",
                    "output_type": "stream",
                    "text": [
                        "INFO:gpt_index.indices.common.tree.base:> Building index from nodes: 2 chunks\n",
                        "INFO:gpt_index.token_counter.token_counter:> [query] Total LLM token usage: 18006 tokens\n",
                        "INFO:gpt_index.token_counter.token_counter:> [query] Total embedding token usage: 0 tokens\n"
                    ]
                }
            ],
            "source": [
                "response = gpt4_index.query(\n",
                "    \"What happened on one night in October 2003?\", \n",
                "    response_mode=\"tree_summarize\"\n",
                ")"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": 71,
            "id": "42bd0984",
            "metadata": {
                "scrolled": true
            },
            "outputs": [
                {
                    "data": {
                        "text/markdown": [
                            "**`Final Response:`** On one night in October 2003, there was a big party at Paul Graham's house, organized by his friend Maria Daniels. At this party, Paul met Jessica Livingston, who would later become his partner in starting Y Combinator. Additionally, Paul Graham had a conversation with his friend Robert Morris about starting a new kind of venture firm that would fund startups in batches, which eventually led to the creation of Y Combinator."
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 1/3`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** 0.740238551627948<br>**Text:** What I Worked On\n",
                            "\n",
                            "February 2021\n",
                            "\n",
                            "Before college the two main things I worked on, outside of schoo...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 2/3`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** really good. He recommended Trevor Blackwell, which surprised me at first, because at that point ...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "---"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**`Source Node 3/3`**"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                },
                {
                    "data": {
                        "text/markdown": [
                            "**Document ID:** 2ea119c7-fc3d-4090-a47a-8dd2a0d37416<br>**Similarity:** None<br>**Text:** make nuclear reactors. But I kept at it, and in October 2013 he finally agreed. We decided he'd t...<br>"
                        ],
                        "text/plain": [
                            "<IPython.core.display.Markdown object>"
                        ]
                    },
                    "metadata": {},
                    "output_type": "display_data"
                }
            ],
            "source": [
                "display_response(response)"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "fd981e5e",
            "metadata": {},
            "source": [
                "# gpt-4-32k"
            ]
        },
        {
            "cell_type": "markdown",
            "id": "9d9f20a9",
            "metadata": {},
            "source": [
                "NOTE: not available yet"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "71137f57",
            "metadata": {},
            "outputs": [],
            "source": [
                "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name=\"gpt-4-32k\"))\n",
                "service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "bb619782",
            "metadata": {},
            "outputs": [],
            "source": [
                "gpt4_32k_index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)"
            ]
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "id": "7d417f6a",
            "metadata": {},
            "outputs": [],
            "source": [
                "len(gpt4_32k_index._index_struct.nodes_dict)"
            ]
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3 (ipykernel)",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.10.9"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 5
}
